# -*- coding: utf-8 -*-
import requests,re

# ##########################################
# 主程序开始
# ##########################################



'''
arrival.nbedi.com
61.175.135.173
60.12.21.5

init_url='http://61.175.135.173/MsFlat/ArrivalReportNew.aspx'
init_page=urllib2.urlopen(init_url).read()
viewstate=re.findall('<input[^>]*name=\"__VIEWSTATE\"[^>]*value=\"([^"]*)\"[^>]*>',init_page,re.DOTALL)
print viewstate
eventvalidation=re.findall('<input[^>]*name=\"__EVENTVALIDATION\"[^>]*value=\"([^"]*)\"[^>]*>',init_page,re.DOTALL)
print eventvalidation

req_data=urllib.urlencode({
    'txt_BillNO':'W232104653',
    'btn_Search':'开始查询',
    '__VIEWSTATEENCRYPTED':'',
    '__VIEWSTATE':viewstate[0],
    '__EVENTVALIDATION':eventvalidation[0]

})
req=urllib2.Request(
    url=init_url,
    data=req_data
)
'''
def cleanhtml(html):
    pattern='\s+|(&nbsp;)+'
    # pattern='\s+'
    html=re.sub(pattern,' ',html)
    # 去掉html标记间的空格
    # pattern='> <'
    # html=re.sub(pattern,'><',html)
    return html.replace('> <','><')

def getReport(ContainerNO='',BillNO=''):
    req_url=r'http://61.175.135.173/MsFlat/ArrivalReportNew.aspx?__VIEWSTATE=e0c0uPrgm8N8wNqSew9WJqhoVwbamZKl0OKzdQ6YiP62mzmt/TWR/liZHpLWTMz1MG04CQQjdPXwm1L8iYEbcqUai5UgD17Iuppthzf99klzQ8pyzdBlgjfCvHVjsImZgXjc3SLfogHWsItLxLZ4NpYInRPDFKcaXsCAEQ==&__VIEWSTATEENCRYPTED=&__EVENTVALIDATION=YH/PNp6wLPyLntadgs1cAgkeAT2oBDNcaamtBY0aDyGxyg4uzKtHPRTbZvziMEXQtUUph6u9xYAcS00hCGxXuCTaQKfK3Mjo5BivX8iN0RFOZBZTQWj0vi3xGP2QVoe8ilCVYA==&txt_ContainerNO=%s&txt_ShipName=&txt_VoyageNo=&txt_StartDate=&txt_EndDate=&btn_Search=开始查询&Pager1_input=1&txt_BillNO=%s'%(ContainerNO,BillNO)
    # print req_url
    res=requests.get(req_url).text
    # print res

    table=re.findall('<table[^>]*id=\"GridView1\"[^>]*>.+?</table>',res,re.S)[0]
    table=cleanhtml(table)
    # print table
    trs=re.findall('<tr><t[h|d].+?</t[h|d]></tr>',table)
    print len(trs)
    print  trs
    rs=[]
    for tr in trs:
        tds=re.findall('<t[h|d].*?>(.+?)</t[h|d]>',tr,re.S)
        # print tds
        tds[10]=re.sub('<.*?>','',tds[10])
        print tds
        rs.append(tds)
    return rs


getReport(ContainerNO='TCLU2732056',BillNO='')
# for bill_no in getBillNOs():
#     getReport(bill_no[0])
'''

# 登录的地址
login_req = urllib2.Request(
    url = 'http://www.nbeport.gov.cn/pkmslogin.form',
    data = login_post_data
)

login_res=urllib2.urlopen(login_req).read()
# print login_res
######################################
billNo='CNNBO620368'
billNo='W232104653'

def getBillNo():
    conn=sqlite3.connect(r'g:\web\qp4\qp.db3')
    conn.text_factory = str
    conn.row_factory = sqlite3.Row
    
    billnos=conn.execute('select distinct bill_no from dec_mess_status where bill_no is not null').fetchall()
    conn.close()
    return billnos


def getQuery(billNo):
    result=urllib2.urlopen('http://www.nbeport.gov.cn/apdev/proDes/query/manifestQueryAction.do?ec_s_check_rec_time=desc&ec_crd=1000&billNo=%s&msgType=PREMFT&flag=F&methodName=doResultQuery'%billNo).read()
    # print result
    result=re.sub(r'[\t\r\n]','',result)
    # print result
    # tbody=re.search(r'<tbody class="tableBody" >(.+?)</tbody>',result,re.DOTALL).group()
    # print tbody
    trs=re.findall('<tr class="odd" >(.+?)</tr>',result,re.DOTALL)
    for tr in trs:
        tds=re.findall('<td>(.+?)</td>',tr,re.DOTALL)
        tds=[td.strip().decode('gbk') for td in tds]
        conn.execute('insert into manifest values(?,?,?,?,?,?,?,?,?,?,?,?)',tds)

    # if len(trs)>=10:
        # print len(trs),billNo
        # print trs

conn=sqlite3.connect(r'g:\web\nbeport\nbeport.db3')
conn.text_factory = str
conn.row_factory = sqlite3.Row
for x in getBillNo():
    # print x
    getQuery(x[0].strip())
conn.commit()
conn.close()

'''